08 August, 2021

Graphics in R

Options

  • Traditional (base) graphics
    • isolated instructions to the device

Options

  • Traditional (base) graphics
    • isolated instructions to the device
  • Grid graphics
    • instruction sets
    • lattice
    • ggplot2

Packages

library(ggplot2) #OR better still library(tidyverse)
library(grid)
library(gridExtra)
library(patchwork)
library(scales)

Graphics infrastructure

  • layers of data driven objects
  • coordinate system
  • scales
  • faceting
  • themes

Motivating data

head(BOD)
##   Time demand
## 1    1    8.3
## 2    2   10.3
## 3    3   19.0
## 4    4   16.0
## 5    5   15.6
## 6    7   19.8
summary(BOD)
##       Time           demand     
##  Min.   :1.000   Min.   : 8.30  
##  1st Qu.:2.250   1st Qu.:11.62  
##  Median :3.500   Median :15.80  
##  Mean   :3.667   Mean   :14.83  
##  3rd Qu.:4.750   3rd Qu.:18.25  
##  Max.   :7.000   Max.   :19.80

ggplot

 p <- ggplot() + #single layer - points
  layer(data = BOD, #data.frame
    mapping = aes(y = demand, x = Time),
    stat = "identity", #use original data
    geom = "point", #plot data as points
    position = "identity",
    params = list(na.rm = TRUE),
    show.legend = FALSE
  )+ #layer of lines
  layer(data = BOD, #data.frame
    mapping = aes(y = demand, x = Time),
    stat = "identity", #use original data
    geom = "line", #plot data as a line
    position = "identity",
    params = list(na.rm = TRUE),
    show.legend = FALSE
  ) +
  coord_cartesian() + #cartesian coordinates
  scale_x_continuous() + #continuous x axis
  scale_y_continuous()  #continuous y axis
p #print the plot

ggplot

ggplot

ggplot(data = BOD, map = aes(y = demand, x = Time)) +
    geom_point() +
    geom_line()

Overview

  • data
p<-ggplot(data = BOD)
  • layers (geoms)
p<-p + geom_point(aes(y = demand, x = Time))
p

Overview

  • data
p<-ggplot(data = BOD)
  • layers (geoms)
p<-p + geom_point(aes(y = demand, x = Time))
  • scales
p <- p + scale_x_sqrt(name = "Time (days)")
p

Layers

Layers

  • layers of data driven objects
    • geometric objects to represent data
    • statistical methods to summarize the data
    • mapping of aesthetics
    • position control

geom_ and stat_

  • coupled together
  • engage either
  • stat_identity

geom_

Arguments

  • data - obvious
  • mapping - aesthetics

If omitted, inherited from ggplot()

  • stat - the stat_ function
  • position - overlapping geoms

geom_

ggplot(data = BOD, aes(y = demand, x = Time)) + geom_point()
#OR
ggplot(data = BOD) + geom_point(aes(y = demand, x = Time))
                                        #OR
ggplot() + geom_point(data = BOD, aes(y = demand, x = Time))

Optional aesthetics mapping

  • alpha - transparency
  • colour - colour of the geometric features
  • fill - colour of the geometric features
  • linetype - fill colour of geometric features
  • size - size of geometric features such as points or text
  • shape - shape of geometric features such as points
  • weight - weightings of values

geom_point

head(CO2)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
## 4   Qn1 Quebec nonchilled  350   37.2
## 5   Qn1 Quebec nonchilled  500   35.3
## 6   Qn1 Quebec nonchilled  675   39.2
summary(CO2)
##      Plant             Type         Treatment       conc          uptake     
##  Qn1    : 7   Quebec     :42   nonchilled:42   Min.   :  95   Min.   : 7.70  
##  Qn2    : 7   Mississippi:42   chilled   :42   1st Qu.: 175   1st Qu.:17.90  
##  Qn3    : 7                                    Median : 350   Median :28.30  
##  Qc1    : 7                                    Mean   : 435   Mean   :27.21  
##  Qc3    : 7                                    3rd Qu.: 675   3rd Qu.:37.12  
##  Qc2    : 7                                    Max.   :1000   Max.   :45.50  
##  (Other):42

geom_point

ggplot(CO2) + geom_point(aes(x = conc, y = uptake), colour = "red")

geom_point

ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))

geom_point

ggplot(CO2) + geom_point(aes(x = conc, y = uptake),
 stat = "summary", fun.y = mean)

Example data sets

head(diamonds)
## # A tibble: 6 x 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
summary(diamonds)
##      carat               cut        color        clarity          depth      
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065   Min.   :43.00  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258   1st Qu.:61.00  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194   Median :61.80  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171   Mean   :61.75  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066   3rd Qu.:62.50  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655   Max.   :79.00  
##                                     J: 2808   (Other): 2531                  
##      table           price             x                y         
##  Min.   :43.00   Min.   :  326   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710   1st Qu.: 4.720  
##  Median :57.00   Median : 2401   Median : 5.700   Median : 5.710  
##  Mean   :57.46   Mean   : 3933   Mean   : 5.731   Mean   : 5.735  
##  3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540   3rd Qu.: 6.540  
##  Max.   :95.00   Max.   :18823   Max.   :10.740   Max.   :58.900  
##                                                                   
##        z         
##  Min.   : 0.000  
##  1st Qu.: 2.910  
##  Median : 3.530  
##  Mean   : 3.539  
##  3rd Qu.: 4.040  
##  Max.   :31.800  
## 

Primary geometric objects

geom_bar

Feature geom stat position
Histogram _bar _bin stack

Continuous variable

ggplot(diamonds) + geom_bar(aes(x = carat))

geom_bar

Feature geom stat position
Barchart _bar _bin stack

Categorical variable

ggplot(diamonds) + geom_bar(aes(x = cut))

geom_bar

Feature geom stat position
barchart _bar _bin stack

Multiple categorical variables

ggplot(diamonds) + geom_bar(aes(x = cut, fill = clarity))

geom_bar

Feature geom stat position
barchart _bar _bin dodge

Multiple categorical variables

ggplot(diamonds) + geom_bar(aes(x = cut, fill = clarity),
  position='dodge')

geom_boxplot

Feature geom stat position
boxplot _boxplot _boxplot dodge
ggplot(diamonds) + geom_boxplot(aes(y = carat))

ggplot(diamonds) + geom_boxplot(aes(x=carat))

geom_boxplot

Feature geom stat position
boxplot _boxplot _boxplot dodge
ggplot(diamonds) + geom_boxplot(aes(x = cut, y = carat))

geom_line

Feature geom stat position
line _line _identity identity
head(CO2, 3)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_line(aes(x = conc, y = uptake))

geom_line

Feature geom stat position
line _line _identity identity
head(CO2, 3)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_line(aes(x = conc, y = uptake, group = Plant))

geom_line

Feature geom stat position
line _line _identity identity
head(CO2, 3)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_line(aes(x = conc, y = uptake, color=Plant))

geom_line

Feature geom stat position
line _line _summary identity
head(CO2, 3)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_line(aes(x = conc, y = uptake),
    stat = "summary", fun.y = mean, color='blue')

geom_point

Feature geom stat position
point _point _identity identity
head(CO2, 3)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_point(aes(x = conc, y = uptake))

geom_point

Feature geom stat position
point _point _identity identity
head(CO2, 3)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_point(aes(x = conc, y = uptake, fill = Treatment),
  shape=21)

geom_smooth

Feature geom stat position
smoother _smooth _smooth identity
head(CO2, 3)
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_smooth(aes(x = conc, y = uptake), method = "lm")

geom_smooth

Feature geom stat position
smoother _smooth _smooth identity
head(CO2, 3)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_smooth(aes(x = conc, y = uptake, fill = Treatment))

geom_polygon

Feature geom stat position
polygon _polygon _identity identity
library(maps)
library(mapdata)
aus <- map_data("worldHires", region="Australia")
head(aus,3)
##       long       lat group order    region              subregion
## 1 142.1461 -10.74943     1     1 Australia Prince of Wales Island
## 2 142.1430 -10.74525     1     2 Australia Prince of Wales Island
## 3 142.1406 -10.74113     1     3 Australia Prince of Wales Island
ggplot(aus, aes(x=long, y=lat, group=group)) +
    geom_polygon()

geom_tile

Feature geom stat position
tile _tile _identity identity
head(faithfuld,3) 
## # A tibble: 3 x 3
##   eruptions waiting density
##       <dbl>   <dbl>   <dbl>
## 1      1.6       43 0.00322
## 2      1.65      43 0.00384
## 3      1.69      43 0.00444
ggplot(faithfuld, aes(waiting, eruptions)) +
      geom_tile(aes(fill = density))

geom_raster

Feature geom stat position
raster _raster _identity identity
head(faithfuld,3)
## # A tibble: 3 x 3
##   eruptions waiting density
##       <dbl>   <dbl>   <dbl>
## 1      1.6       43 0.00322
## 2      1.65      43 0.00384
## 3      1.69      43 0.00444
ggplot(faithfuld, aes(waiting, eruptions)) +
      geom_raster(aes(fill = density))

Secondary geometric objects

Example data set

head(warpbreaks)
##   breaks wool tension
## 1     26    A       L
## 2     30    A       L
## 3     54    A       L
## 4     25    A       L
## 5     70    A       L
## 6     52    A       L
summary(warpbreaks)
##      breaks      wool   tension
##  Min.   :10.00   A:27   L:18   
##  1st Qu.:18.25   B:27   M:18   
##  Median :26.00          H:18   
##  Mean   :28.15                 
##  3rd Qu.:34.00                 
##  Max.   :70.00

geom_errorbar

Feature geom stat position
errorbar _identity _identity identity
library(dplyr)
library(gmodels)
warpbreaks.sum <- warpbreaks %>%
    group_by(wool) %>%
    summarise(Mean = mean(breaks),
              Lower=ci(breaks)[2],
              Upper=ci(breaks)[3])
warpbreaks.sum
## # A tibble: 2 x 4
##   wool   Mean Lower Upper
## * <fct> <dbl> <dbl> <dbl>
## 1 A      31.0  24.8  37.3
## 2 B      25.3  21.6  28.9

geom_errorbar

Feature geom stat position
errorbar _identity _identity identity
ggplot(warpbreaks.sum) +
    geom_errorbar(aes(x = wool, ymin = Lower, ymax = Upper), width=0.2)

geom_errorbar

Feature geom stat position
errorbar _identity _summary identity
head(warpbreaks,3)
##   breaks wool tension
## 1     26    A       L
## 2     30    A       L
## 3     54    A       L
ggplot(warpbreaks) + geom_errorbar(aes(x = wool, y = breaks),
    stat = "summary", fun.data = "mean_cl_boot")

geom_linerange

Feature geom stat position
errorbar _identity _identity identity
ggplot(warpbreaks.sum) +
    geom_linerange(aes(x = wool, ymin = Lower, ymax = Upper))

geom_pointrange

Feature geom stat position
errorbar _identity _identity identity
ggplot(warpbreaks.sum) +
    geom_pointrange(aes(y=Mean, x = wool, ymin = Lower, ymax = Upper))

Coordinate systems

Coordinate systems

head(CO2,3)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_point(aes(x = conc, y = uptake))+
  coord_cartesian() #default

Coordinate systems

head(CO2,3)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_point(aes(x = conc, y = uptake))+
  coord_polar()

Coordinate systems

head(CO2,3)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
ggplot(CO2) + geom_point(aes(x = conc, y = uptake))+
 coord_flip()

Coordinate systems

#Orthographic coordinates
library(maps)
library(mapdata)
aus <- map_data("worldHires", region = "Australia")
ggplot(aus, aes(x = long, y = lat, group = group)) +
  coord_map("ortho", orientation=c(-20, 125, 23.5))+
  geom_polygon()

Scales

scale_x_ and scale_y_

Axis titles

head(CO2,2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
ggplot(CO2, aes(y = uptake, x = conc)) + geom_point()+
  scale_x_continuous(name = "CO2 conc")

scale_x_ and scale_y_

Axis titles with math

head(CO2,2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
ggplot(CO2, aes(y = uptake, x = conc)) + geom_point()+
    scale_x_continuous(name = expression(
         Ambient~CO[2]~concentration~(mg/l)))

scale_x_ and scale_y_

Axis more padding

ggplot(CO2, aes(y = uptake, x = conc)) + geom_point()+
  scale_x_continuous(name = "CO2 conc", expand = c(0,200))

scale_x_ and scale_y_

Axis on a log scale

ggplot(CO2, aes(y = uptake, x = conc)) + geom_point()+
  scale_x_log10(name = "CO2 conc",
    breaks = as.vector(c(1, 2, 5, 10) %o% 10^(-1:2)))

scale_x_ and scale_y_

Axis representing categorical data

ggplot(CO2, aes(y = uptake, x = Treatment)) + geom_point()+
  scale_x_discrete(name = "Treatment")

Other scales

  • size of points (thickness of lines)
  • shape of points
  • linetype of lines
  • color of lines or points
  • fill of shapes

scale_size

Size according to continuous variable

state=data.frame(state.x77, state.region, state.division, state.center) %>%
    select(Illiteracy, state.region, x, y)
head(state,2)
##         Illiteracy state.region         x       y
## Alabama        2.1        South  -86.7509 32.5901
## Alaska         1.5         West -127.2500 49.2500
ggplot(state, aes(y = y, x = x)) + geom_point(aes(size = Illiteracy))

scale_size

Discrete sizes ranging in size from 2 to 4

head(state,2)
##         Illiteracy state.region         x       y
## Alabama        2.1        South  -86.7509 32.5901
## Alaska         1.5         West -127.2500 49.2500
ggplot(state, aes(y = y,x = x)) + geom_point(aes(size = state.region))+
  scale_size_discrete(name = "Region", range = c(2, 10))

scale_size

Manual sizes (2 and 4)

head(state,2)
##         Illiteracy state.region         x       y
## Alabama        2.1        South  -86.7509 32.5901
## Alaska         1.5         West -127.2500 49.2500
ggplot(state, aes(y = y, x = x)) + geom_point(aes(size = state.region))+
    scale_size_manual(name = "Region", values = c(2, 5, 6, 10))

scale_shape

head(CO2,2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
ggplot(CO2, aes(y = uptake,x=conc)) + geom_point(aes(shape = Treatment))

scale_shape

CO2 = CO2 %>% mutate(Comb = interaction(Type, Treatment))
CO2 %>% pull(Comb) %>% levels
## [1] "Quebec.nonchilled"      "Mississippi.nonchilled" "Quebec.chilled"        
## [4] "Mississippi.chilled"
ggplot(CO2, aes(y = uptake,x = conc)) + geom_point(aes(shape = Comb))+
  scale_shape_discrete(name = "Type",
   labels=c("Quebec non-chilled","Quebec chilled",
            "Miss. non-chilled","Miss. chilled"))

scale_linetype

head(CO2,2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake              Comb
## 1   Qn1 Quebec nonchilled   95   16.0 Quebec.nonchilled
## 2   Qn1 Quebec nonchilled  175   30.4 Quebec.nonchilled
ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth(aes(linetype = Comb)) +
  scale_linetype_discrete(name = "Type")

scale_linetype

head(CO2,2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake              Comb
## 1   Qn1 Quebec nonchilled   95   16.0 Quebec.nonchilled
## 2   Qn1 Quebec nonchilled  175   30.4 Quebec.nonchilled
ggplot(CO2, aes(y = uptake, x = conc)) +
  geom_smooth(aes(linetype = Treatment)) +
  scale_linetype_manual(name = "Treatment", values = c("dashed","dotted"))

scale_fill and scale_color

head(faithfuld,2)
## # A tibble: 2 x 3
##   eruptions waiting density
##       <dbl>   <dbl>   <dbl>
## 1      1.6       43 0.00322
## 2      1.65      43 0.00384
ggplot(faithfuld, aes(waiting, eruptions)) +
    geom_raster(aes(fill = density)) +
    scale_fill_continuous(low = 'red', high = 'blue')

scale_fill and scale_color

head(faithfuld,2)
## # A tibble: 2 x 3
##   eruptions waiting density
##       <dbl>   <dbl>   <dbl>
## 1      1.6       43 0.00322
## 2      1.65      43 0.00384
ggplot(faithfuld, aes(waiting, eruptions)) +
  geom_raster(aes(fill = density)) +
  scale_fill_gradient2(low = 'red', mid = 'white', high = 'blue',
                       midpoint = 0.02)

scale_fill and scale_color

head(faithfuld,2)
## # A tibble: 2 x 3
##   eruptions waiting density
##       <dbl>   <dbl>   <dbl>
## 1      1.6       43 0.00322
## 2      1.65      43 0.00384
ggplot(faithfuld, aes(waiting, eruptions)) +
  geom_raster(aes(fill = density)) +
  scale_fill_gradientn(colours = terrain.colors(10))

scale_fill and scale_color

head(faithfuld,2)
## # A tibble: 2 x 3
##   eruptions waiting density
##       <dbl>   <dbl>   <dbl>
## 1      1.6       43 0.00322
## 2      1.65      43 0.00384
ggplot(faithfuld, aes(waiting, eruptions)) +
  geom_raster(aes(fill = density)) +
  scale_fill_viridis_c(option = 'D')

##also try scale_fill_viridis_b
## also options= 'A', 'B', 'C'

Color palettes

RColorBrewer::display.brewer.all()

Color palettes

Facets

Facets

Panels - matrices of plots

  • facet_wrap
  • facet_grid

Facets

ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))+ 
    facet_wrap(~Plant)

Facets

ggplot(CO2) + geom_line(aes(x = conc, y = uptake, colour = Type))+ 
    facet_wrap(~Plant, scales = 'free_y')

Facets

ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))+ 
    facet_grid(Type~Treatment)

Multiple plots

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
g2 <- ggplot(CO2) + geom_point(aes(x = Treatment, y = uptake))
grid.arrange(g1,  g2)

Multiple plots

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
g2 <- ggplot(CO2) + geom_point(aes(x = Treatment, y = uptake))
g1/g2

Multiple plots

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
g2 <- ggplot(CO2) + geom_point(aes(x = Treatment, y = uptake))
g1+g2

Themes

theme_classic

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_classic()

theme_bw

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_bw()

theme_grey

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_grey()

theme_minimal

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_minimal()

theme_linedraw

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_linedraw()

theme_light

ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth() +
    geom_point() +
    theme_light()

others

png('resources/xkcd.png', width=500, height=500, res=200)
library(xkcd)  
library(sysfonts)
library(extrafont)
download.file("http://simonsoftware.se/other/xkcd.ttf", dest="xkcd.ttf")
font_import(".")
loadfonts()
xrange <- range(CO2$conc)
yrange <- range(CO2$uptake)
ggplot(CO2, aes(y = uptake, x = conc)) + geom_smooth(position='jitter', size=1.5) +
    #geom_point() +
        theme_minimal()+theme(text=element_text(size=16, family='xkcd'))+
        xkcdaxis(xrange, yrange)

dev.off()        

others

Saving ggplot figures

Saving ggplot figures

g1 <- ggplot(CO2) + geom_point(aes(x = conc, y = uptake, colour = Type))
ggsave(filename='figure1.pdf', g1,  width=7,  height=5)

Practice

head(state)
##            Illiteracy state.region         x       y
## Alabama           2.1        South  -86.7509 32.5901
## Alaska            1.5         West -127.2500 49.2500
## Arizona           1.8         West -111.6250 34.2192
## Arkansas          1.9        South  -92.2992 34.7336
## California        1.1         West -119.7730 36.5341
## Colorado          0.7         West -105.5130 38.6777

Calculate the mean and 95% confidence interval of Illiteracy per state.region and plot them.

Practice

library(gmodels)
state.sum = state %>% group_by(state.region) %>%
    summarise(Mean = mean(Illiteracy),
              Lower = ci(Illiteracy)[2],
              Upper = ci(Illiteracy)[3])
state.sum
## # A tibble: 4 x 4
##   state.region   Mean Lower Upper
## * <fct>         <dbl> <dbl> <dbl>
## 1 Northeast      1    0.786 1.21 
## 2 South          1.74 1.44  2.03 
## 3 North Central  0.7  0.610 0.790
## 4 West           1.02 0.655 1.39
ggplot(state.sum, aes(y = Mean, x = state.region)) + geom_point() +
    geom_errorbar(aes(ymin = Lower, ymax = Upper), width=0.1)

Practice

ggplot(state.sum, aes(y = Mean, x = state.region)) + geom_point() +
    geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.1) +
    scale_x_discrete("Region") +
    scale_y_continuous("Illiteracy rate (%)")+
    theme_classic() +
    theme(axis.line.y = element_line(),
          axis.line.x = element_line())

Practice

library(gmodels)
state.sum = state %>% group_by(state.region) %>%
    mutate(mean_sdl(Illiteracy))
state.sum
## # A tibble: 50 x 6
## # Groups:   state.region [4]
##    Illiteracy state.region       x     y   ymin  ymax
##         <dbl> <fct>          <dbl> <dbl>  <dbl> <dbl>
##  1        2.1 South          -86.8  1.74  0.633 2.84 
##  2        1.5 West          -127.   1.02 -0.194 2.24 
##  3        1.8 West          -112.   1.02 -0.194 2.24 
##  4        1.9 South          -92.3  1.74  0.633 2.84 
##  5        1.1 West          -120.   1.02 -0.194 2.24 
##  6        0.7 West          -106.   1.02 -0.194 2.24 
##  7        1.1 Northeast      -72.4  1     0.443 1.56 
##  8        0.9 South          -75.0  1.74  0.633 2.84 
##  9        1.3 South          -81.7  1.74  0.633 2.84 
## 10        2   South          -83.4  1.74  0.633 2.84 
## 11        1.9 West          -126.   1.02 -0.194 2.24 
## 12        0.6 West          -114.   1.02 -0.194 2.24 
## 13        0.9 North Central  -89.4  0.7   0.417 0.983
## 14        0.7 North Central  -86.1  0.7   0.417 0.983
## 15        0.5 North Central  -93.4  0.7   0.417 0.983
## 16        0.6 North Central  -98.1  0.7   0.417 0.983
## 17        1.6 South          -84.8  1.74  0.633 2.84 
## 18        2.8 South          -92.3  1.74  0.633 2.84 
## 19        0.7 Northeast      -69.0  1     0.443 1.56 
## 20        0.9 South          -76.6  1.74  0.633 2.84 
## 21        1.1 Northeast      -71.6  1     0.443 1.56 
## 22        0.9 North Central  -84.7  0.7   0.417 0.983
## 23        0.6 North Central  -94.6  0.7   0.417 0.983
## 24        2.4 South          -89.8  1.74  0.633 2.84 
## 25        0.8 North Central  -92.5  0.7   0.417 0.983
## 26        0.6 West          -109.   1.02 -0.194 2.24 
## 27        0.6 North Central  -99.6  0.7   0.417 0.983
## 28        0.5 West          -117.   1.02 -0.194 2.24 
## 29        0.7 Northeast      -71.4  1     0.443 1.56 
## 30        1.1 Northeast      -74.2  1     0.443 1.56 
## 31        2.2 West          -106.   1.02 -0.194 2.24 
## 32        1.4 Northeast      -75.1  1     0.443 1.56 
## 33        1.8 South          -78.5  1.74  0.633 2.84 
## 34        0.8 North Central -100.   0.7   0.417 0.983
## 35        0.8 North Central  -82.6  0.7   0.417 0.983
## 36        1.1 South          -97.1  1.74  0.633 2.84 
## 37        0.6 West          -120.   1.02 -0.194 2.24 
## 38        1   Northeast      -77.4  1     0.443 1.56 
## 39        1.3 Northeast      -71.1  1     0.443 1.56 
## 40        2.3 South          -80.5  1.74  0.633 2.84 
## 41        0.5 North Central  -99.7  0.7   0.417 0.983
## 42        1.7 South          -86.5  1.74  0.633 2.84 
## 43        2.2 South          -98.8  1.74  0.633 2.84 
## 44        0.6 West          -111.   1.02 -0.194 2.24 
## 45        0.6 Northeast      -72.5  1     0.443 1.56 
## 46        1.4 South          -78.2  1.74  0.633 2.84 
## 47        0.6 West          -120.   1.02 -0.194 2.24 
## 48        1.4 South          -80.7  1.74  0.633 2.84 
## 49        0.7 North Central  -90.0  0.7   0.417 0.983
## 50        0.6 West          -107.   1.02 -0.194 2.24
ggplot(state.sum, aes(y = y, x = state.region)) + geom_point() +
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.1)

Practice

library(gmodels)
state.sum = state %>% group_by(state.region) %>%
    mutate(mean_cl_boot(Illiteracy))
state.sum
## # A tibble: 50 x 6
## # Groups:   state.region [4]
##    Illiteracy state.region       x     y  ymin  ymax
##         <dbl> <fct>          <dbl> <dbl> <dbl> <dbl>
##  1        2.1 South          -86.8  1.74 1.46  2    
##  2        1.5 West          -127.   1.02 0.731 1.35 
##  3        1.8 West          -112.   1.02 0.731 1.35 
##  4        1.9 South          -92.3  1.74 1.46  2    
##  5        1.1 West          -120.   1.02 0.731 1.35 
##  6        0.7 West          -106.   1.02 0.731 1.35 
##  7        1.1 Northeast      -72.4  1    0.822 1.18 
##  8        0.9 South          -75.0  1.74 1.46  2    
##  9        1.3 South          -81.7  1.74 1.46  2    
## 10        2   South          -83.4  1.74 1.46  2    
## 11        1.9 West          -126.   1.02 0.731 1.35 
## 12        0.6 West          -114.   1.02 0.731 1.35 
## 13        0.9 North Central  -89.4  0.7  0.625 0.775
## 14        0.7 North Central  -86.1  0.7  0.625 0.775
## 15        0.5 North Central  -93.4  0.7  0.625 0.775
## 16        0.6 North Central  -98.1  0.7  0.625 0.775
## 17        1.6 South          -84.8  1.74 1.46  2    
## 18        2.8 South          -92.3  1.74 1.46  2    
## 19        0.7 Northeast      -69.0  1    0.822 1.18 
## 20        0.9 South          -76.6  1.74 1.46  2    
## 21        1.1 Northeast      -71.6  1    0.822 1.18 
## 22        0.9 North Central  -84.7  0.7  0.625 0.775
## 23        0.6 North Central  -94.6  0.7  0.625 0.775
## 24        2.4 South          -89.8  1.74 1.46  2    
## 25        0.8 North Central  -92.5  0.7  0.625 0.775
## 26        0.6 West          -109.   1.02 0.731 1.35 
## 27        0.6 North Central  -99.6  0.7  0.625 0.775
## 28        0.5 West          -117.   1.02 0.731 1.35 
## 29        0.7 Northeast      -71.4  1    0.822 1.18 
## 30        1.1 Northeast      -74.2  1    0.822 1.18 
## 31        2.2 West          -106.   1.02 0.731 1.35 
## 32        1.4 Northeast      -75.1  1    0.822 1.18 
## 33        1.8 South          -78.5  1.74 1.46  2    
## 34        0.8 North Central -100.   0.7  0.625 0.775
## 35        0.8 North Central  -82.6  0.7  0.625 0.775
## 36        1.1 South          -97.1  1.74 1.46  2    
## 37        0.6 West          -120.   1.02 0.731 1.35 
## 38        1   Northeast      -77.4  1    0.822 1.18 
## 39        1.3 Northeast      -71.1  1    0.822 1.18 
## 40        2.3 South          -80.5  1.74 1.46  2    
## 41        0.5 North Central  -99.7  0.7  0.625 0.775
## 42        1.7 South          -86.5  1.74 1.46  2    
## 43        2.2 South          -98.8  1.74 1.46  2    
## 44        0.6 West          -111.   1.02 0.731 1.35 
## 45        0.6 Northeast      -72.5  1    0.822 1.18 
## 46        1.4 South          -78.2  1.74 1.46  2    
## 47        0.6 West          -120.   1.02 0.731 1.35 
## 48        1.4 South          -80.7  1.74 1.46  2    
## 49        0.7 North Central  -90.0  0.7  0.625 0.775
## 50        0.6 West          -107.   1.02 0.731 1.35
ggplot(state.sum, aes(y = y, x = state.region)) + geom_point() +
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.1)

Practice

Overlay illiteracy data onto map of US

library(mapdata)
US <- map_data("worldHires", region = "USA")
ggplot(US) +
    geom_polygon(aes(x = long, y = lat, group = group)) +
    geom_point(data = state, aes(y = y, x = x, size = Illiteracy),
               color = "red")

Practice

Overlay illiteracy data onto map of US

library(mapdata) 
US <- map_data("worldHires", region="USA")
ggplot(US) +
    geom_polygon(aes(x = long, y = lat, group = group)) +
    geom_point(data = state,aes(y = y, x = x, size = Illiteracy),
               color = "red")+
    coord_map(xlim = c(-150, -50), ylim = c(20, 60)) +
    theme_minimal()

Practice

MACNALLY <- read.csv('../data/macnally.csv',
  header = TRUE, row.names = 1, strip.white = TRUE)
head(MACNALLY)
##                 HABITAT GST EYR
## Reedy Lake        Mixed 3.4 0.0
## Pearcedale  Gipps.Manna 3.4 9.2
## Warneet     Gipps.Manna 8.4 3.8
## Cranbourne  Gipps.Manna 3.0 5.0
## Lysterfield       Mixed 5.6 5.6
## Red Hill          Mixed 8.1 4.1

Calculate the mean and standard error of GST and plot them

Practice

Calculate the mean and standard error of GST and plot mean and confidence bars

library(gmodels)
ci(MACNALLY$GST)
##   Estimate   CI lower   CI upper Std. Error 
##   4.878378   4.035292   5.721465   0.415704
MACNALLY.agg = MACNALLY %>% group_by(HABITAT) %>%
  summarize(Mean = mean(GST), Lower = ci(GST)[2], Upper = ci(GST)[3])
ggplot(MACNALLY.agg, aes(y = Mean, x = HABITAT)) +
    geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.1)+
    geom_point() + theme_classic()

Practice

You can also use ggplot’s summary

library(tidyverse)
MACNALLY.melt = MACNALLY %>%
  pivot_longer(-HABITAT, names_to = "variable",  values_to = "value")
ggplot(MACNALLY.melt, aes(y = value, x = HABITAT)) +
    stat_summary(fun.y = "mean", geom = "point")+
    stat_summary(fun.data = "mean_cl_normal", geom = "errorbar",
                 width = 0.1)+
    facet_grid(~variable)

Practice

You can also use ggplot’s summary

#and bootstrapped means..
ggplot(MACNALLY.melt, aes(y = value, x = HABITAT)) +
    stat_summary(fun.y = "mean", geom = "point")+
    stat_summary(fun.data = "mean_cl_boot", geom = "errorbar",
                 width = 0.1)+
    facet_grid(~variable)